“players.csv” + “salaries_1985to2018.csv”: https://data.world/datadavis/nba-salaries
“all_seasons.csv”: https://www.kaggle.com/datasets/justinas/nba-players-data
library(knitr)
library(plyr)
library(dplyr)
library(tidyr)
library(caret)
library(ggplot2)
library(corrplot)
library(stringr)
library(scales)
library(randomForest)
library(psych)
library(glmnet)
library(rpart)
library(lubridate)
library(plotly)
opts_chunk$set(echo = TRUE, cache = TRUE)
opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE, fig.height = 8, fig.width = 12)
Source: https://data.world/datadavis/nba-salaries
It has primary key id, their birth date, birth place, career statistics,
name, position
dim(players)
## [1] 4685 24
str(players)
## 'data.frame': 4685 obs. of 24 variables:
## $ X_id : chr "abdelal01" "abdulza01" "abdulka01" "abdulma02" ...
## $ birthDate : chr "June 24, 1968" "April 7, 1946" "April 16, 1947" "March 9, 1969" ...
## $ birthPlace : chr "Cairo, Egypt" "Brooklyn, New York" "New York, New York" "Gulfport, Mississippi" ...
## $ career_AST : num 0.3 1.2 3.6 3.5 1.1 2.5 1.2 1 0.7 0.5 ...
## $ career_FG. : chr "50.2" "42.8" "55.9" "44.2" ...
## $ career_FG3.: chr "0.0" "" "5.6" "35.4" ...
## $ career_FT. : chr "70.1" "72.8" "72.1" "90.5" ...
## $ career_G : int 256 505 1560 586 236 830 319 1 56 174 ...
## $ career_PER : chr "13.0" "15.1" "24.6" "15.4" ...
## $ career_PTS : num 5.7 9 24.6 14.6 7.8 18.1 5.6 0 9.5 5.3 ...
## $ career_TRB : chr "3.3" "8.0" "11.2" "1.9" ...
## $ career_WS : chr "4.8" "17.5" "273.4" "25.2" ...
## $ career_eFG.: chr "50.2" "" "55.9" "47.2" ...
## $ college : chr "Duke University" "Iowa State University" "University of California, Los Angeles" "Louisiana State University" ...
## $ draft_pick : chr "25th overall" "5th overall" "1st overall" "3rd overall" ...
## $ draft_round: chr "1st round" "1st round" "1st round" "1st round" ...
## $ draft_team : chr "Portland Trail Blazers" "Cincinnati Royals" "Milwaukee Bucks" "Denver Nuggets" ...
## $ draft_year : chr "1990" "1968" "1969" "1990" ...
## $ height : chr "6-10" "6-9" "7-2" "6-1" ...
## $ highSchool : chr "Bloomfield in Bloomfield, New Jersey" "John Jay in Brooklyn, New York" "Power Memorial in New York, New York" "Gulfport in Gulfport, Mississippi" ...
## $ name : chr "Alaa Abdelnaby" "Zaid Abdul-Aziz" "Kareem Abdul-Jabbar" "Mahmoud Abdul-Rauf" ...
## $ position : chr "Power Forward" "Power Forward and Center" "Center" "Point Guard" ...
## $ shoots : chr "Right" "Right" "Right" "Right" ...
## $ weight : chr "240lb" "235lb" "225lb" "162lb" ...
Source: https://data.world/datadavis/nba-salaries
It record the player_id (as a foreign key to the players.csv), their
salary, the season and the teams they played in.
dim(salaries)
## [1] 14163 7
str(salaries)
## 'data.frame': 14163 obs. of 7 variables:
## $ league : chr "NBA" "NBA" "NBA" "NBA" ...
## $ player_id : chr "abdelal01" "abdelal01" "abdelal01" "abdelal01" ...
## $ salary : int 395000 494000 500000 805000 650000 1530000 2030000 2000000 3000000 1660000 ...
## $ season : chr "1990-91" "1991-92" "1992-93" "1993-94" ...
## $ season_end : int 1991 1992 1993 1994 1995 1985 1986 1988 1989 1991 ...
## $ season_start: int 1990 1991 1992 1993 1994 1984 1985 1987 1988 1990 ...
## $ team : chr "Portland Trail Blazers" "Portland Trail Blazers" "Boston Celtics" "Boston Celtics" ...
Source: https://www.kaggle.com/datasets/justinas/nba-players-data
It record the player names, teams, age, and other statistics for each
individual season from 1996 season to 2020 season. (the index in this
table is not linked to the previous tables)
dim(Pstats)
## [1] 11700 22
str(Pstats)
## 'data.frame': 11700 obs. of 22 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ player_name : chr "Travis Knight" "Matt Fish" "Matt Bullard" "Marty Conlon" ...
## $ team_abbreviation: chr "LAL" "MIA" "HOU" "BOS" ...
## $ age : num 22 27 30 29 22 22 36 26 33 32 ...
## $ player_height : num 213 211 208 211 206 ...
## $ player_weight : num 107 107 107 111 107 ...
## $ college : chr "Connecticut" "North Carolina-Wilmington" "Iowa" "Providence" ...
## $ country : chr "USA" "USA" "USA" "USA" ...
## $ draft_year : chr "1996" "1992" "Undrafted" "Undrafted" ...
## $ draft_round : chr "1" "2" "Undrafted" "Undrafted" ...
## $ draft_number : chr "29" "50" "Undrafted" "Undrafted" ...
## $ gp : int 71 6 71 74 42 9 70 31 70 82 ...
## $ pts : num 4.8 0.3 4.5 7.8 3.7 1.6 3.2 2 11.3 9.9 ...
## $ reb : num 4.5 0.8 1.6 4.4 1.6 0.7 2.7 1.2 2.6 4.8 ...
## $ ast : num 0.5 0 0.9 1.4 0.5 0.4 0.3 0 4.9 11.4 ...
## $ net_rating : num 6.2 -15.1 0.9 -9 -14.5 -3.5 3.5 -17.1 -3.1 -2 ...
## $ oreb_pct : num 0.127 0.143 0.016 0.083 0.109 0.087 0.092 0.109 0.023 0.035 ...
## $ dreb_pct : num 0.182 0.267 0.115 0.152 0.118 0.045 0.146 0.152 0.088 0.116 ...
## $ usg_pct : num 0.142 0.265 0.151 0.167 0.233 0.135 0.137 0.232 0.192 0.155 ...
## $ ts_pct : num 0.536 0.333 0.535 0.542 0.482 0.47 0.555 0.448 0.597 0.525 ...
## $ ast_pct : num 0.052 0 0.099 0.101 0.114 0.125 0.034 0.013 0.289 0.464 ...
## $ season : chr "1996-97" "1996-97" "1996-97" "1996-97" ...
For the data in salaries, player that change team in the middle of
the season will receive salary from two teams and thus two entries. I
will add up the salary if there is more than one entry for one unique
player name in one season.
I will use the team they were in at the end of the season as the Player
statistic data set uses the last team they were in.
salaries <- salaries %>%
group_by(player_id, season_start, season_end, season) %>%
summarise(salary = sum(salary), team = team[length(team)],
league = "NBA")
## `summarise()` has grouped output by 'player_id', 'season_start', 'season_end'.
## You can override using the `.groups` argument.
Since it is from the same source and the two tables are linked by a primary key (players.X_id) and foreign key (salaries.player_id). I will link them with merge column X_id and player_id.
player_salary <- merge(players, salaries, by.x = "X_id", by.y = "player_id")
dim(player_salary)
## [1] 13752 30
str(player_salary)
## 'data.frame': 13752 obs. of 30 variables:
## $ X_id : chr "abdelal01" "abdelal01" "abdelal01" "abdelal01" ...
## $ birthDate : chr "June 24, 1968" "June 24, 1968" "June 24, 1968" "June 24, 1968" ...
## $ birthPlace : chr "Cairo, Egypt" "Cairo, Egypt" "Cairo, Egypt" "Cairo, Egypt" ...
## $ career_AST : num 0.3 0.3 0.3 0.3 0.3 3.6 3.6 3.6 3.6 3.5 ...
## $ career_FG. : chr "50.2" "50.2" "50.2" "50.2" ...
## $ career_FG3. : chr "0.0" "0.0" "0.0" "0.0" ...
## $ career_FT. : chr "70.1" "70.1" "70.1" "70.1" ...
## $ career_G : int 256 256 256 256 256 1560 1560 1560 1560 586 ...
## $ career_PER : chr "13.0" "13.0" "13.0" "13.0" ...
## $ career_PTS : num 5.7 5.7 5.7 5.7 5.7 24.6 24.6 24.6 24.6 14.6 ...
## $ career_TRB : chr "3.3" "3.3" "3.3" "3.3" ...
## $ career_WS : chr "4.8" "4.8" "4.8" "4.8" ...
## $ career_eFG. : chr "50.2" "50.2" "50.2" "50.2" ...
## $ college : chr "Duke University" "Duke University" "Duke University" "Duke University" ...
## $ draft_pick : chr "25th overall" "25th overall" "25th overall" "25th overall" ...
## $ draft_round : chr "1st round" "1st round" "1st round" "1st round" ...
## $ draft_team : chr "Portland Trail Blazers" "Portland Trail Blazers" "Portland Trail Blazers" "Portland Trail Blazers" ...
## $ draft_year : chr "1990" "1990" "1990" "1990" ...
## $ height : chr "6-10" "6-10" "6-10" "6-10" ...
## $ highSchool : chr "Bloomfield in Bloomfield, New Jersey" "Bloomfield in Bloomfield, New Jersey" "Bloomfield in Bloomfield, New Jersey" "Bloomfield in Bloomfield, New Jersey" ...
## $ name : chr "Alaa Abdelnaby" "Alaa Abdelnaby" "Alaa Abdelnaby" "Alaa Abdelnaby" ...
## $ position : chr "Power Forward" "Power Forward" "Power Forward" "Power Forward" ...
## $ shoots : chr "Right" "Right" "Right" "Right" ...
## $ weight : chr "240lb" "240lb" "240lb" "240lb" ...
## $ season_start: int 1990 1991 1992 1993 1994 1984 1985 1987 1988 1990 ...
## $ season_end : int 1991 1992 1993 1994 1995 1985 1986 1988 1989 1991 ...
## $ season : chr "1990-91" "1991-92" "1992-93" "1993-94" ...
## $ salary : int 395000 494000 500000 805000 650000 1530000 2030000 2000000 3000000 1660000 ...
## $ team : chr "Portland Trail Blazers" "Portland Trail Blazers" "Boston Celtics" "Boston Celtics" ...
## $ league : chr "NBA" "NBA" "NBA" "NBA" ...
teams <- unique(player_salary$team)
teamsABB <- c("POR", "BOS", "SAC", "LAL", "DEN", "VAN", "DAL",
"ATL", "OKC", "DET", "LAC", "ORL", "HOU", "TOR", "NYK", "BKN",
"PHX", "NJN", "MEM", "WAS", "CHA", "MIA", "GSW", "CHA", "MIN",
"SAS", "NOP", "PHI", "WAS", "NOH", "CLE", "MIL", "CHI", "IND",
"SEA", "UTA", "NOK", "None", "KCK")
teamABBTab <- data.frame(team_name = teams, team_ABB = teamsABB)
player_salary$team_abbreviation <- sapply(player_salary$team,
function(x) teamABBTab$team_ABB[which(teamABBTab$team_name ==
x)])
player_salary <- rename(player_salary, player_name = name)
Pstats_salary <- merge(player_salary, Pstats, by = c("player_name",
"season", "team_abbreviation"))
dim(Pstats_salary)
## [1] 8625 50
str(Pstats_salary)
## 'data.frame': 8625 obs. of 50 variables:
## $ player_name : chr "A.C. Green" "A.C. Green" "A.C. Green" "A.C. Green" ...
## $ season : chr "1996-97" "1997-98" "1998-99" "1999-00" ...
## $ team_abbreviation: chr "DAL" "DAL" "DAL" "LAL" ...
## $ X_id : chr "greenac01" "greenac01" "greenac01" "greenac01" ...
## $ birthDate : chr "October 4, 1963" "October 4, 1963" "October 4, 1963" "October 4, 1963" ...
## $ birthPlace : chr "Portland, Oregon" "Portland, Oregon" "Portland, Oregon" "Portland, Oregon" ...
## $ career_AST : num 1.1 1.1 1.1 1.1 1.1 0 1.8 1.8 1.8 3 ...
## $ career_FG. : chr "49.4" "49.4" "49.4" "49.4" ...
## $ career_FG3. : chr "25.4" "25.4" "25.4" "25.4" ...
## $ career_FT. : chr "73.4" "73.4" "73.4" "73.4" ...
## $ career_G : int 1278 1278 1278 1278 1278 8 80 80 80 645 ...
## $ career_PER : chr "14.4" "14.4" "14.4" "14.4" ...
## $ career_PTS : num 9.6 9.6 9.6 9.6 9.6 1 5.5 5.5 5.5 9.7 ...
## $ career_TRB : chr "7.4" "7.4" "7.4" "7.4" ...
## $ career_WS : chr "99.5" "99.5" "99.5" "99.5" ...
## $ career_eFG. : chr "50.1" "50.1" "50.1" "50.1" ...
## $ college.x : chr "Oregon State University" "Oregon State University" "Oregon State University" "Oregon State University" ...
## $ draft_pick : chr "23rd overall" "23rd overall" "23rd overall" "23rd overall" ...
## $ draft_round.x : chr "1st round" "1st round" "1st round" "1st round" ...
## $ draft_team : chr "Los Angeles Lakers" "Los Angeles Lakers" "Los Angeles Lakers" "Los Angeles Lakers" ...
## $ draft_year.x : chr "1985" "1985" "1985" "1985" ...
## $ height : chr "6-9" "6-9" "6-9" "6-9" ...
## $ highSchool : chr "Benson Polytechnic in Portland, Oregon" "Benson Polytechnic in Portland, Oregon" "Benson Polytechnic in Portland, Oregon" "Benson Polytechnic in Portland, Oregon" ...
## $ position : chr "Power Forward and Small Forward" "Power Forward and Small Forward" "Power Forward and Small Forward" "Power Forward and Small Forward" ...
## $ shoots : chr "Right" "Right" "Right" "Right" ...
## $ weight : chr "220lb" "220lb" "220lb" "220lb" ...
## $ season_start : int 1996 1997 1998 1999 2000 1999 2000 2001 2002 2007 ...
## $ season_end : int 1997 1998 1999 2000 2001 2000 2001 2002 2003 2008 ...
## $ salary : int 4851000 5095088 5125088 1700000 2250000 118974 316969 465850 18748 972720 ...
## $ team : chr "Dallas Mavericks" "Dallas Mavericks" "Dallas Mavericks" "Los Angeles Lakers" ...
## $ league : chr "NBA" "NBA" "NBA" "NBA" ...
## $ X : int 300 549 1001 1691 2031 1711 2030 2623 2920 5164 ...
## $ age : num 33 34 35 36 37 23 23 24 25 23 ...
## $ player_height : num 206 206 206 206 206 ...
## $ player_weight : num 102 102 102 102 102 ...
## $ college.y : chr "Oregon State" "Oregon State" "Oregon State" "Oregon State" ...
## $ country : chr "USA" "USA" "USA" "USA" ...
## $ draft_year.y : chr "1985" "1985" "1985" "1985" ...
## $ draft_round.y : chr "1" "1" "1" "1" ...
## $ draft_number : chr "23" "23" "23" "23" ...
## $ gp : int 83 82 50 82 82 8 33 45 2 51 ...
## $ pts : num 7.2 7.3 4.9 5 4.5 1 6 5.4 0 5.2 ...
## $ reb : num 7.9 8.1 4.6 5.9 3.8 2.8 1.1 1 0 1.1 ...
## $ ast : num 0.8 1.5 0.5 1 0.5 0 1.9 1.8 1 1.7 ...
## $ net_rating : num -8 -7.2 -5.6 8.1 3.3 -32.6 -12.4 -3.8 -3.9 -0.5 ...
## $ oreb_pct : num 0.1 0.09 0.097 0.089 0.089 0.158 0.018 0.022 0 0.026 ...
## $ dreb_pct : num 0.207 0.196 0.179 0.179 0.171 0.208 0.053 0.067 0 0.085 ...
## $ usg_pct : num 0.119 0.118 0.148 0.111 0.141 0.146 0.169 0.221 0.282 0.224 ...
## $ ts_pct : num 0.523 0.496 0.441 0.482 0.492 0.19 0.495 0.477 0 0.535 ...
## $ ast_pct : num 0.045 0.074 0.043 0.058 0.05 0 0.198 0.248 0.4 0.249 ...
write.csv(Pstats_salary, "dataset/all.csv")
all <- read.csv("dataset/all.csv")
dim(all)
## [1] 8625 31
str(all)
## 'data.frame': 8625 obs. of 31 variables:
## $ X.1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ player_name : chr "A.C. Green" "A.C. Green" "A.C. Green" "A.C. Green" ...
## $ team_abbreviation: chr "DAL" "DAL" "DAL" "LAL" ...
## $ X_id : chr "greenac01" "greenac01" "greenac01" "greenac01" ...
## $ birthDate : chr "October 4, 1963" "October 4, 1963" "October 4, 1963" "October 4, 1963" ...
## $ birthPlace : chr "Portland, Oregon" "Portland, Oregon" "Portland, Oregon" "Portland, Oregon" ...
## $ college : chr "Oregon State University" "Oregon State University" "Oregon State University" "Oregon State University" ...
## $ draft_team : chr "Los Angeles Lakers" "Los Angeles Lakers" "Los Angeles Lakers" "Los Angeles Lakers" ...
## $ height : num 81 81 81 81 81 82 73 73 73 72 ...
## $ highSchool : chr "Benson Polytechnic in Portland, Oregon" "Benson Polytechnic in Portland, Oregon" "Benson Polytechnic in Portland, Oregon" "Benson Polytechnic in Portland, Oregon" ...
## $ position : chr "Power Forward and Small Forward" "Power Forward and Small Forward" "Power Forward and Small Forward" "Power Forward and Small Forward" ...
## $ shoots : chr "Right" "Right" "Right" "Right" ...
## $ weight : num 220 220 220 220 220 227 180 180 180 161 ...
## $ season_start : int 1996 1997 1998 1999 2000 1999 2000 2001 2002 2007 ...
## $ salary : int 4851000 5095088 5125088 1700000 2250000 118974 316969 465850 18748 972720 ...
## $ X : int 300 549 1001 1691 2031 1711 2030 2623 2920 5164 ...
## $ age : int 33 34 35 36 37 23 23 24 25 23 ...
## $ country : chr "USA" "USA" "USA" "USA" ...
## $ draft_year : chr "1985" "1985" "1985" "1985" ...
## $ draft_round : chr "1" "1" "1" "1" ...
## $ draft_number : chr "23" "23" "23" "23" ...
## $ gp : int 83 82 50 82 82 8 33 45 2 51 ...
## $ pts : num 7.2 7.3 4.9 5 4.5 1 6 5.4 0 5.2 ...
## $ reb : num 7.9 8.1 4.6 5.9 3.8 2.8 1.1 1 0 1.1 ...
## $ ast : num 0.8 1.5 0.5 1 0.5 0 1.9 1.8 1 1.7 ...
## $ net_rating : num -8 -7.2 -5.6 8.1 3.3 -32.6 -12.4 -3.8 -3.9 -0.5 ...
## $ oreb_pct : num 0.1 0.09 0.097 0.089 0.089 0.158 0.018 0.022 0 0.026 ...
## $ dreb_pct : num 0.207 0.196 0.179 0.179 0.171 0.208 0.053 0.067 0 0.085 ...
## $ usg_pct : num 0.119 0.118 0.148 0.111 0.141 0.146 0.169 0.221 0.282 0.224 ...
## $ ts_pct : num 0.523 0.496 0.441 0.482 0.492 0.19 0.495 0.477 0 0.535 ...
## $ ast_pct : num 0.045 0.074 0.043 0.058 0.05 0 0.198 0.248 0.4 0.249 ...
sal_year <- all %>%
group_by(season_start) %>%
summarise(median = median(salary), mean = mean(salary), sd = sd(salary))
fig1 <- plot_ly(sal_year, x = ~season_start, y = ~median, name = "Median salary",
type = "scatter", mode = "lines") %>%
add_trace(y = ~mean, name = "Mean salary") %>%
layout(title = "Salary in NBA from 1996-97 season to 2017-18 season",
yaxis = list(title = "Salary (USD)"), xaxis = list(title = "Season"))
fig1
fig2 <- plot_ly(sal_year, x = ~season_start, y = ~sd, name = "standard deviation",
type = "scatter", mode = "lines") %>%
layout(title = "Standard Deviation of NBA Salary")
fig2
fig3 <- plot_ly(data = all, x = ~season_start, y = ~salary, type = "box")
fig3
inflation <- read.csv("files/inflation.csv")
all <- merge(all, inflation[, c(1, 3)], by.x = "season_start",
by.y = "year")
all$salary_infl <- all$salary/all$pct
sal_year <- all %>%
group_by(season_start) %>%
summarise(median = median(salary_infl), mean = mean(salary_infl),
sd = sd(salary_infl))
fig1 <- plot_ly(sal_year, x = ~season_start, y = ~median, name = "Median salary",
type = "scatter", mode = "lines") %>%
add_trace(y = ~mean, name = "Mean salary") %>%
layout(title = "Salary in NBA from 1996-97 season to 2017-18 season",
yaxis = list(title = "Salary (USD)"), xaxis = list(title = "Season"))
fig1
fig2 <- plot_ly(sal_year, x = ~season_start, y = ~sd, name = "standard deviation",
type = "scatter", mode = "lines") %>%
layout(title = "Standard Deviation of NBA Salary")
fig2
fig3 <- plot_ly(data = all, x = ~season_start, y = ~salary, type = "box")
fig3
I will split year into decades for better visualization
all$decade <- cut(all$season_start, breaks = c(1990, 2000, 2010,
2020))
levels(all$decade) <- c("1990-2000", "2000-2010", "2010-2020")
plot_ly(data = all, x = ~decade, y = ~salary_infl, type = "box")
Position:
The position the player play
I will create a variable recording the players’ primary position
pos <- paste0(all$position, " ,") %>%
str_replace_all(pattern = "and", replacement = "&") %>%
str_extract(pattern = "^[a-zA-Z ]+(,|&)")
pos <- sapply(as.character(pos), function(x) substring(x, first = 1,
last = nchar(x) - 2))
all$pri_position <- pos
all$mult_pos <- 0
all$mult_pos[grep("and", all$position)] <- 1
all$pri_position <- factor(all$pri_position, levels = c("Point Guard",
"Shooting Guard", "Small Forward", "Power Forward", "Center"))
ggplot(all, aes(x = pri_position, y = salary_infl, col = decade)) +
geom_boxplot()
All position shows a general increasing trend except Center. Although
the median of centers’ remain similar in 2010s comparing to 2000s, the
upper outliers drop significantly in 2010s. While the median of the
small forward remain similar, the upper outlier plummeted during the
period ## Important numeric variables
numVar <- which(sapply(all, is.numeric))
numVarName <- names(numVar)
There are 20 numeric variables
all_numVar <- all[, numVar]
all_numVar <- select(all_numVar, !salary)
cor_numVar <- cor(all_numVar, use = "pairwise.complete.obs")
cor_sorted <- as.matrix(sort(cor_numVar[, "salary_infl"], decreasing = TRUE))
cor_high <- row.names(cor_sorted)[1:10]
cor_numVar <- cor_numVar[cor_high, cor_high]
corrplot.mixed(cor_numVar, tl.pos = "lt")
Point:
Average number of points scored
The points and salary correlation for each decade is very similar.
g1 <- ggplot(all, aes(x = pts, y = salary_infl, col = decade)) +
geom_point() + geom_smooth(col = "black", method = "gam",
formula = y ~ x) + facet_grid(decade ~ .) + labs(x = "points per game",
y = "salary after inflation (USD)", title = "point vs salary of each decade")
g1
There is no significant change in points per game in each position
g2 <- ggplot(all, aes(x = as.factor(season_start), y = pts, fill = pri_position)) +
geom_boxplot() + facet_grid(pri_position ~ .) + labs(x = "Year",
y = "points per game", title = "points per game throughout the years for each position")
g2
Rebounds:
Average number of rebounds grabbed
g3 <- ggplot(all, aes(x = reb, y = salary_infl, col = pri_position)) +
geom_point() + geom_smooth(col = "black", method = "gam",
formula = y ~ x) + facet_grid(pri_position ~ .) + labs(x = "rebounds per game",
y = "salary after inflation (USD)", title = "rebounds vs salary of each decade")
g3
g4 <- ggplot(all, aes(x = reb, fill = pri_position)) + geom_histogram(binwidth = 1) +
facet_grid(decade ~ .) + labs(x = "rebounds per game", y = "frequency",
title = "rebounds per game for each decade") + theme(legend.position = "bottom")
g4
cor_numVar <- cor(all_numVar[which(all$decade == "1990-2000"),
], use = "pairwise.complete.obs")
cor_high <- names(sort(cor_numVar[, "salary_infl"], decreasing = TRUE))
cor_numVar2000 <- cor_numVar[cor_high, cor_high]
corrplot.mixed(cor_numVar2000, tl.pos = "lt")
cor_numVar <- cor(all_numVar[which(all$decade == "2000-2010"),
], use = "pairwise.complete.obs")
cor_high <- names(sort(cor_numVar[, "salary_infl"], decreasing = TRUE))
cor_numVar1990 <- cor_numVar[cor_high, cor_high]
corrplot.mixed(cor_numVar2000, tl.pos = "lt")
cor_numVar <- cor(all_numVar[which(all$decade == "2010-2020"),
], use = "pairwise.complete.obs")
cor_high <- names(sort(cor_numVar[, "salary_infl"], decreasing = TRUE))
cor_numVar1990 <- cor_numVar[cor_high, cor_high]
corrplot.mixed(cor_numVar1990, tl.pos = "lt")